Introduction

This Mark Down file contains code and results evaluating the effect of wildfire smoke on cardiopulmonary morbidity and mortality along the Colorado front range communities from 2010 to 2015. The hypothesis for this study is that we will see an increase in the risk of cardiopulmonary morbidity and mortality associated with an increase in PM2.5 due to wildfire smoke.

Study Area Map

Study bounding box.

# clip by bbox function ------
bbox_clip <- function(sf, bbox) {
  # find the CRS of the sf object
  crs <- sf::st_crs(sf)$proj4string
  # create matrix
  x <- c(bbox[1], bbox[1], bbox[3], bbox[3], bbox[1])
  y <- c(bbox[2], bbox[4], bbox[4], bbox[2], bbox[2])
  coords <- matrix(cbind(x, y), ncol=2)
  # create polygon and assign same coord crs as sf object
  coords_poly <- sp::Polygon(coords)
  bbox_poly <- sp:: SpatialPolygons(list(sp::Polygons(list(coords_poly),
    ID = "bbox")), proj4string = sp::CRS(crs))
  # convert to sf feature
  bbox_sf <- st_as_sf(bbox_poly)
  # clip sf object
  clipped_sf <- sf[bbox_sf,]
  return(clipped_sf)
}

# clipping bounding box
study_bbox <- st_bbox(c(xmin=-105.3, xmax=-104.5, ymax=41, ymin = 38))

Reading in county shapefiles.

# county subset
county_sub_name <- c("Larimer", "Weld", "Boulder", "Broomfield", "Adams", 
                "Denver", "Jefferson", "Arapahoe", "Douglas", "El Paso",
                "Pueblo")

# read in county shapefile and subset to only colorado fips
co_county_sf <- st_read("../../data/shapefiles/us_county", 
                        layer = "us_county") %>%
  # limit to colorado
  filter(STATEFP == "08") %>% 
  # create fips variable
  mutate(fips = paste0(STATEFP, COUNTYFP))
## Reading layer `us_county' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/us_county' using driver `ESRI Shapefile'
## Simple feature collection with 3108 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -124.7631 ymin: 24.5231 xmax: -66.9499 ymax: 49.38436
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
# save wgs84 crs
wgs <- st_crs(co_county_sf)

# county clip
county_clip <- bbox_clip(co_county_sf, study_bbox)

# extract county names
county_text <- county_clip %>% 
  st_transform(wgs) %>% 
  st_centroid() %>% 
  st_coordinates() %>% 
  cbind(., as.character(county_clip$NAME)) %>%
  as_data_frame() %>% 
  rename(lon = X, lat = Y, county = V3) %>% 
  mutate(lon = as.numeric(lon), lat = as.numeric(lat))

Reading in Front Range Grid points that will define who is included in the study area.

# read in front range grid csv
frontrange_grid <- read_csv('../../data/smoke/front_range_grid.csv') 

# read in grid simple features and limit to the front range grid
study_grid <- read_sf('../../data/shapefiles/co_krig_grid/', crs = wgs) %>% 
  filter(GRID_ID %in% frontrange_grid$GRID_ID)

nrow(frontrange_grid)
## [1] 75
# print out bounding box for front range grid
ggplot(frontrange_grid, 
       aes(x=lon, y=lat, label=paste(round(lon,2), round(lat,2), sep=', '))) +
  geom_text(size = 4) +
  xlim(-105.5,-104.4)

Note 2020-04-18:

Ideally I’d like to give the exact coordinates for the bounding box for Kate, but I don’t remember how to exacly subset. I’ve printed out the Lon/Lat coords of the grid cells I used in air pollution measures for the front range. Keep in mind I projected it as a WGS 84 to align with other shapefiles. But it looks like the bounding box cells would be Top left: -105.39, 40.63 Top right: -104.62, 40.67 Bottom left: -105.24, 38.59 Bottom right: -104.49, 38.62

Interstate shapefile limited to I-25 and I-70.

# read in colorado roads shapefile
interstate <- st_read(paste0("../../data/shapefiles/tl_2015_08_prisecroads"), 
                      layer = "tl_2015_08_prisecroads") %>% 
  # filter to I25 lines
  filter(RTTYP == "I") %>% 
  st_transform(crs = wgs)
## Reading layer `tl_2015_08_prisecroads' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/tl_2015_08_prisecroads' using driver `ESRI Shapefile'
## Simple feature collection with 3230 features and 4 fields
## geometry type:  LINESTRING
## dimension:      XY
## bbox:           xmin: -109.0602 ymin: 36.99251 xmax: -102.0417 ymax: 41.00307
## epsg (SRID):    4269
## proj4string:    +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
# interstate clip
i_clip <- bbox_clip(interstate, study_bbox)

Reading in city boundary simple features.

# read in city polygons 2010
city <- st_read("../../data/shapefiles/Colorado_City_Point_Locations/", 
                layer = "Colorado_City_Point_Locations") %>% 
  st_transform(crs = wgs)
## Reading layer `Colorado_City_Point_Locations' from data source `/Users/ganr1/Documents/public_repo/colorado_wildfire/data/shapefiles/Colorado_City_Point_Locations' using driver `ESRI Shapefile'
## Simple feature collection with 587 features and 8 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: -109.0146 ymin: 37.00304 xmax: -102.0804 ymax: 40.98833
## epsg (SRID):    4269
## proj4string:    +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
# limit cities 
city_points <- city %>% 
  filter(NAME %in% c("FORT COLLINS", "PUEBLO", "GREELEY", "BOULDER", "DENVER",
                     "COLORADO SPRINGS")) %>% 
  mutate(city = stringr::str_to_title(NAME))

Reading 2015 population estimate geotiff for Colorado. I made this file for the ALA project from the SEDAC 2015 global estimate.

# read colorado 2015 population raster
co_pop_2015 <- raster::raster("../../data/shapefiles/2015-ColoradoPopDensity.tif")
# extract bbox of clipped county subset
fr_extent <- raster::extent(st_bbox(county_clip)[c(1,3,2,4)])
# raster
fr_pop_2015 <- raster::crop(co_pop_2015, fr_extent)

# i'm going to create a shapefile/simple features; easier to plot
front_range_pop_sf <- st_as_sf(raster::rasterToPolygons(fr_pop_2015)) %>% 
  rename(popden = X2015.ColoradoPopDensity) %>% 
  # filtering to cells > 100 
  filter(popden > 100)

# cut once more to county shapefile so that populations outside the counties are not present
pop_clip <- front_range_pop_sf[county_clip,]

Map of Study Area

Study map that will contain an inset map to highlight the area of interest.

Inset map.

# create county inset map
inset_map <- ggplot(co_county_sf) +
  geom_sf(fill = "transparent", color = "black", size = 0.1) +
  geom_sf(data = study_grid, 
          fill = 'red', 
          color = 'transparent', 
          alpha = 0.5) +
  ggtitle("Colorado: Study Area") +
  theme(plot.title = element_text(size = 12),
        panel.background = element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        plot.background = element_rect(fill = "transparent", 
                                       color = "transparent"))
inset_map

Study map.

# plot map
study_map <-ggplot(pop_clip) +
  # start with plot of simple features of populations
  geom_sf(aes(fill=popden), color = NA) +
  # define aesthetics of poipulation
    scale_fill_gradient(name = expression("Population per km"^2), 
      low = "#26d0ce", high = "#1a2980") +
  # plot lines of counties
  geom_sf(data = county_clip, color = "black", 
          fill = "transparent", size = 0.5) +
  # plot i-25 and i-70
  geom_sf(data = i_clip, aes(color = "Interstate"), show.legend = "line") +
  # plot study grid
  geom_sf(data = study_grid, aes(color = "Study Grid"), fill = "transparent", 
          size = 0.1, show.legend = "line") +
  # custom colors for interstate and study grid
  scale_color_manual(values = c("Interstate" = "#0f9b0f", 
                                "Study Grid" = "red"), 
    labels = c("Interstate", "Study Grid"),
    name = "Boundary") + 
  # major city points and names
  geom_point(data = city_points, aes(x = LONG, y = LAT), color = "#3f2b96") +
  geom_text(data = city_points, aes(x = LONG, y = LAT, label = city), 
             color = "#3f2b96", size = 4, hjust = 1, vjust = -0.6) +
  # theme
  theme(panel.background = element_blank(),
        panel.grid.major = element_line(colour = 'transparent'),
        panel.grid.minor = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank(),
        legend.key = element_rect(fill = NA, colour = NA, size = 0.25))

study_map

Final map with inset. Using the grid package.I may add the AQS monitors to this map.

# save map
#tiff(filename = "../analysis/figures/study_map.tiff", w=850, h=550)
grid::grid.newpage()
vpmain <- grid::viewport(width = 1, height = 1, x = 0.5, y = 0.5)
vpinset <- grid::viewport(width = 0.25, height = 0.25, x = 0.8, y = 0.85)
print(study_map, vp = vpmain)
print(inset_map, vp = vpinset)

Smoke PM2.5

Kate has estimated PM2.5 at 15x15 km grid level using kriged surface site monitors for the entire US from 2010 to 2015. I have limited the gridded PM2.5 to Colorado Front Range study grid. I have also estimated population-weighted PM2.5 for all Colorado counties (although I don’t think I’ll use it for this project). There were concerns about the accuracy of the krig PM in areas outside of the front range communities since most PM stations are on the Front Range.

I am also going to set up this lag function here to create lagged days that will be used in the analysis.

Note: 2020-05-31 swiching back to 3 day lag period

# defining a lag function that I will use later in the distributed lag model
funlag <- function(var, n=5){
  var <- enquo(var)
  indices <- seq_len(n)
  map( indices, ~quo(lag(!!var, !!.x)) ) %>% 
    set_names(sprintf("%s_lag%d", rlang::quo_text(var), indices))
}

I’d like to find the most likely grid for each county and plot each grid for each county instead of the population-weighted county estimates. I’m going to find the county name for which has the largest proportion of intersection of the grid.

# identifying the county that most of the grid lies in
grid_county_id <- st_intersection(study_grid, county_clip) %>% 
  dplyr::select(GRID_ID, NAME, fips) 

# proportion each grid in county
grid_prop <- as.numeric(st_area(grid_county_id)/st_area(study_grid))

# find the county id that contains the highest proportion for each grid
grid_county_final <- grid_county_id %>% 
  mutate(prop_int = grid_prop,
         GRID_ID = as.character(GRID_ID)) %>% 
  group_by(GRID_ID) %>% 
  filter(prop_int == max(prop_int))

# remove geometry
st_geometry(grid_county_final) <- NULL

Consider removing this following section.

# front range fips
fr_fips <- c("08001", "08005", "08013", "08031", "08035", "08041",
             "08059", "08069", "08123")

# read pm data 
co_pm <- read_csv("../../data/smoke/1015-county_popwt_pm.csv") %>% 
  # limiting to colorado counties only by reading 1st 2 digs of 5-dig fips code
  filter(fips %in% fr_fips) %>% 
  # renaming variable pm_smk to pm_diff; more accurate description of var
  rename(pm_diff = pm_smk) %>% 
  # us
  mutate(county_name = case_when(fips == '08001' ~ 'Adams',
                                 fips == '08005' ~ 'Arapahoe',
                                 fips == '08013' ~ 'Boulder',
                                 fips == '08031' ~ 'Denver',
                                 fips == '08035' ~ 'Douglas',
                                 fips == '08041' ~ 'El Paso',
                                 fips == '08059' ~ 'Jefferson',
                                 fips == '08069' ~ 'Larimer',
                                 fips == '08123' ~ 'Weld'),
         day = as.factor(weekdays(date)), # create day of week based on var
         weekend = ifelse(day %in% c("Saturday", "Sunday"), 1, 0),
         month = as.factor(lubridate::month(date)), # extract month as factor
         year = as.factor(lubridate::year(date)), # extract year as factor
         season = as.factor(case_when(month %in% c(12, 1, 2) ~ "winter",
                                      month %in% c(3:5) ~ "spring",
                                      month %in% c(6:8) ~ "summer",
                                      month %in% c(9:11)~ "fall")),
         # creating binary smoke to require 50% of county with smoke overhead
         # and difference between estimate and seasonal background to be >0
         # and to be within the month of April to Octoboer
         smoke0_hms = ifelse(pm_diff > 0 & month %in% c(5:10) & hms > 0.5,1,0),
         smoke5_hms = ifelse(pm_diff > 5 & month %in% c(5:10) & hms > 0.1,1,0),
         smoke10_hms = ifelse(pm_diff > 10 & month %in% c(5:10) & hms > 0.1,1,0),
         smoke15_hms = ifelse(pm_diff > 15 & month %in% c(5:10) & hms > 0.1,1,0),
         # transforming pm kriged estimates to a 10 unit increase in pm
         pm = pm_krig/10, 
         # continuous pm smoke accounting for hms
         cpm_smk = ifelse(hms > 0.1 & month %in% c(5:10), pm_diff, 0),
         cpm_smk = ifelse(cpm_smk < 0, 0, cpm_smk),
         aqi_warning = ifelse(aqi_cat %in% 
            c('Unhealthy', 'Unhealthy for Sensitive Groups'), 1, 0)) %>%  
  # sorting by fips and date to estimate lag for each county by date
  arrange(fips, date) %>% 
  # group by fips 
  group_by(fips) %>%
  # apply funlag to create lagged estimates
  mutate(., !!!funlag(pm,5), !!!funlag(smoke0_hms,5), !!!funlag(smoke5_hms,5),
         !!!funlag(smoke10_hms,5), !!!funlag(smoke15_hms,5), 
         !!!funlag(temp_f, 5)) %>% 
  dplyr::select(-state, -month) # removing state and month to bind with casecross

Front Range Grid Estimates of Smoke PM2.5 2010 to 2015

To try and estimate PM2.5 from smoke vs. other sources, I’ve taken the estimated kriged value of PM2.5 and subtracted off the seasonal background of PM2.5 and only consider this difference to be from smoke if there is smoke in the atmospheric column using the HMS grids. For county population-weighted values, I use a rather loose definition of at least 10% of the county with smoke overhead. The grid-level estimate of smoke is probably more reliable. I’ll show some quick diagnostic plots below.

Here is a plot of the population-weighted smoke PM2.5 for each Front Range county.

pm_plot <- ggplot(data=co_pm, aes(x=date, y=cpm_smk)) +
  geom_point(size = 0.5, color = "#6441a5") +
  facet_wrap(~county_name) +
  xlab('Date') +
  ylab('County Smoke PM2.5 ug/m^3') +
  theme_minimal()
# plot
print(pm_plot)

This definition of smoke seems reasonable. I do have some concerns with the Kriged model in that there isn’t enough stations to capture spatial variations. For example, the 2012 Waldo Canyon Fire likely affected south Front Range counties like El Paso a little after the High Park Fire likely affected northern front range counties like Larmier. Does the county population-weighted time series reflect this? All the smoke peaks across look uniform across Front Range counties. This could be explained by the kriging process if certain sites are really driving the smoothed surfaces.

Maps of the study area and specific fires may help understand the exposure series. I will work on these after some input.

# code chunk that reads in grid pm2.5 and prepares some lagged variables
# for joining with health outcomes data.

# read new grid/wrfgrid key
grid_key <- read_csv('../../data/shapefiles/wrfgrid_key.csv', 
                     col_types = list(GRID_ID = col_character(),
                                      WRFGRID_ID = col_character())) %>% 
  dplyr::select(-COLX, -ROWY)

# read grid pm
grid_pm <- read_csv('../../data/smoke/1015-grid_pm.csv', 
                    col_types = list(GRID_ID = col_character())) %>% 
  # create some variables         
  mutate(pm_diff_g = pm25_grid - sbg_pm_grid,
         month = as.factor(lubridate::month(date)), # extract month as factor
         gsmk5_hms = ifelse(pm_diff_g > 5 & month %in% c(4:10) & hms_grid == 1,
                            1,0),
         gsmk10_hms = ifelse(pm_diff_g > 10 & month %in% c(4:10) & hms_grid == 1,
                             1,0),
         gsmk15_hms = ifelse(pm_diff_g > 15 & month %in% c(4:10) & hms_grid == 1,
                             1,0),
         # continuous pm smoke accounting for hms
         gpm_smk = ifelse(hms_grid == 1 & month %in% c(5:10), pm_diff_g, 0),
         gpm_smk = ifelse(gpm_smk < 0, 0, gpm_smk),
         # transforming pm smke estimates to a 10 unit increase in pm
         gpm_smk10unit = gpm_smk/10) %>% 
         # sorting by GRID_ID and date to estimate lag for each county by date
         arrange(GRID_ID, date) %>% 
         # group by GRID_ID 
         group_by(GRID_ID) %>%
         # apply funlag to create lagged estimates
         mutate(., !!!funlag(gpm_smk10unit,5), !!!funlag(gsmk5_hms,5),
               !!!funlag(gsmk10_hms,5), !!!funlag(gsmk15_hms,5),
               !!!funlag(temp_f_grid, 5)) %>% 
         dplyr::select(-month) %>% 
         # filter to grids in study area
         filter(GRID_ID %in% frontrange_grid$GRID_ID) %>% 
         # join county id
         left_join(dplyr::select(grid_county_final, GRID_ID, NAME, fips), 
                   by = 'GRID_ID')

Grid smoke values in each front range county.

# front range fips
fr_fips <- c("08001", "08005", "08013", "08031", "08035", "08041",
             "08059", "08069", "08123")

pm_plot <- ggplot(data=filter(grid_pm, fips %in% fr_fips), 
                  aes(x=date, y=gpm_smk, group = GRID_ID)) +
  geom_point(size = 0.5, color = "#6441a5") +
  facet_wrap(~NAME) +
  xlab('Date') +
  ylab(expression(paste("Smoke PM"[2.5]," in ", mu,"g/m"^3))) +
  theme_minimal()
# plot
print(pm_plot)

Summary statistics of PM2.5 in grid.

grid_pm %>%
  ungroup() %>% 
  summarize(mean_pm25 = mean(pm25_grid), max_pm25 = max(pm25_grid),
            min_pm25 = 0)

Wildfire season summary stats.

grid_pm %>%
  mutate(month = lubridate::month(date)) %>% 
  filter(month %in% 5:10) %>% 
  ungroup() %>% 
  summarize(mean_pm25 = mean(pm25_grid), max_pm25 = max(pm25_grid),
            min_pm25 = 0, mean_smk = mean(gpm_smk), max_smk = max(gpm_smk))

Reading ozone estimates.

ozone <- read_csv('../../data/smoke/1015-frontrange_kriged_o3.csv') %>% 
  mutate(GRID_ID = as.character(GRID_ID)) %>% 
  # sorting by GRID_ID and date to estimate lag for each county by date
  arrange(GRID_ID, date) %>% 
  # group by GRID_ID 
  group_by(GRID_ID) %>%
  # apply funlag to create lagged estimates
  mutate(., !!!funlag(o3_8hr_max_ppb,5))

Hospitalizations and Mortality

I’ve created time-stratified case-crossover data frames for both mortality and hospitalization data provided by CDPHE with reference periods within a month of the observation. The justification for a ‘month’ period is appropriate for mortality I think, as I think it’s reasonable counter factual window in when a person could have died. A longer period like we’ve previously used like the entire wildfire season may not be as appropriate for mortality for a person who is already at risk of death.

Hospitalizations

# load casecross list
load("../../data/health/1015-co_morbidity_casecross_list.RData")
# load icd9
load("../../data/health/icd9_outcome_vectors.RData")

Cleaning hospitalization data and joining with both grid-level and count-level PM2.5 data. I consider the following inpatient hospitalization events with admitting source via the emergency room or urgent care (as a way to get at acute events): all respiratory, asthma, chronic obstructive pulmonary disease (COPD), acute bronchitis, pneumonia, all cardiovascular events, arrhythmia, cerebrovascular, heart failure, ischemic heart disease, and myocardial infraction (which is a subcategory of IHD).

Some notes for the Colorado hospitalization data is that unlike Washington and Oregon, I have no way of identifying multiple events per person so I am not able to limit to first event and I am assuming each event is independent of others. Also, unlike Washington and Oregon, I’ve decided to go with monthly referent periods rather than the whole wildfire season. Possible suggestion would be to run sensitivity analyses to see if this has an impact.

# extract names
outcome <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
              'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
              'Heart Failure', 'Ischemic Heart Disease', 
              'Myocardial Infarction')

# reduce case-crossover list to only summer months and join GRID ID
co_hosp_list <- co_morbidity_cc_list %>% 
  # desired format to make sure it's right
  map(~ mutate(., outcome = as.numeric(as.character(outcome)),
               date = as.Date(as.character(date)),
               month = as.factor(lubridate::month(date))) %>% 
      # filter out 2016; I don't have pm data yet
      filter(date <= "2015-12-30") %>% 
      filter(month %in% 5:10) %>% 
      filter(fips %in% fr_fips) %>% 
      dplyr::select(-state, -month) %>% 
      # join with grid key
      left_join(grid_key, by = 'WRFGRID_ID') %>% 
      left_join(co_pm, by = c("fips", "date")) %>% 
      # left join grid pm
      left_join(grid_pm, by = c("GRID_ID", "date")) %>% 
      left_join(ozone, by = c("GRID_ID", "date")) %>% 
      # filter to front range grid ids
      filter(GRID_ID %in% frontrange_grid$GRID_ID)) %>% 
  # add outcome name to each dataframe
  map2(.x = ., .y = outcome, ~mutate(.x, out_name = .y))

Colorado Hospitalization Respiratory and Cardiovascular Time Series

Creating and plotting the time series of respiratory and cardiovascular events for the state of Colorado.

# create time series of hospitalizations
ts_hosp <- co_morbidity_cc_list[c(1,6)] %>% 
  # using map 2 to add outcome names to each dataframe
  map2(., names(co_morbidity_cc_list[c(1,6)]), function(df, name){
    counts <- df %>% 
      filter(outcome == 1) %>% 
      group_by(date) %>% 
      summarise(n = n()) %>% 
      mutate(outcome = name,
             date = as.Date(date))
    }) %>% 
  # bind dataframes
  map_dfr(.,rbind)
ts_hosp_plot <- ggplot(data=ts_hosp, aes(x=date, y=n)) +
  geom_point() +
  facet_wrap(~outcome) +
  theme_minimal()

ts_hosp_plot

The cardiovascular hospitalization is pretty uniform, with some noticeable dips at the end of the year of 2013 and when ICD9 switches to ICD10 at the end of the time series. I’d say this is probably a systemic thing that has to do with coding or how hospitals recorded events rather than an actual big drop in the rate of CVD hospitalizations. Although may Kirk or some others at CDPHE may know.

Counts of Hospitalization Events

Number of inpatient hospitalization events that occurred in Colorado Front Range Communities between 2010 and 2015 in the May to October Months. Also note that October of 2015 is when ICD9 was switched to ICD10, so I do not have that last month.

strata <- c('All', 'Sex', 'Age')
# estimate hospitalizations
hosp_counts <- co_hosp_list %>% 
  map_dfr(.,function(df){
    map_dfr(strata, function(x){
    if(x == 'All'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name) %>% 
        summarise(n_events = n()) %>% 
        mutate(strata = x) %>% 
        dplyr::select(out_name, strata, n_events)
    } else if (x == 'Sex'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, sex) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = sex) %>% 
        dplyr::select(out_name, strata, n_events)
    } else {
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, age_cat) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = age_cat) %>% 
        dplyr::select(out_name, strata, n_events)
    }
    }) # end strata map
  }) %>% # end outcome list map
  spread(strata, n_events) %>% 
  dplyr::select(out_name, All, F, M, age_under_15, age_15_to_65, age_over_65) %>% 
  mutate_at(vars(F:age_over_65), funs(round((./All)*100,1)))

# print death counts
knitr::kable(hosp_counts, caption = 'Number of Inpatient Events')
Number of Inpatient Events
out_name All F M age_under_15 age_15_to_65 age_over_65
Acute Bronchitis 1884 41.9 58.1 65.3 20.1 14.5
All CVD 68356 47.8 52.2 0.3 40.2 59.5
All Respiratory 46585 50.8 49.2 7.9 44.9 47.2
Arrhythmia 9958 48.8 51.2 0.4 31.9 67.6
Asthma 6816 53.0 47.0 11.5 65.8 22.6
Cerebrovascular 10484 34.4 65.6 0.1 48.3 51.6
COPD 6656 53.6 46.4 0.2 37.3 62.5
Heart Failure 10090 33.7 66.3 0.1 48.2 51.7
Ischemic Heart Disease 10930 50.2 49.8 0.4 27.5 72.1
Myocardial Infarction 13660 53.6 46.4 0.3 33.7 65.9
Pneumonia 14694 51.2 48.8 5.4 40.7 53.9

Assessment of Assigned PM2.5

Quick assessment of how well my smoke PM2.5 definition at the grid level correlates at the county level for front range counties. I’ve used the cardiovascular hospitalizations (as they are most common) to plot the assigned county smoke PM2.5 vs grid smoke PM2.5 It looks like the values are similar in some respect, and sometimes not. I would assume the grid level is more accurate since my definition requires the grid be completely impacted by smoke based on the HMS plume.

ggplot(data = filter(co_hosp_list[[6]], !is.na(gpm_smk)), 
                     aes(x=cpm_smk, y =gpm_smk)) +
  geom_point() +
  ylab('Grid Smoke PM2.5') +
  xlab('County Smoke PM2.5') +
  ggtitle('Comparison of County vs Grid Smoke PM2.5') +
  theme_minimal()

Check how many events have a county PM assigned but not a grid PM assigned.

missing <- co_hosp_list[[6]] %>% 
  filter(outcome == 1) %>% 
  mutate(county_miss = ifelse(is.na(pm_krig), 1, 0),
         grid_miss = ifelse(is.na(pm25_grid), 1, 0)) %>% 
  group_by(county_miss, grid_miss) %>% 
  summarize(n = n()) %>% 
  mutate(prop = round(n/(69064+8716), 2))

knitr::kable(missing, caption = 'CVD Hospitalization Proportion Missing Grid PM2.5')
CVD Hospitalization Proportion Missing Grid PM2.5
county_miss grid_miss n prop
0 0 68356 0.88

Roughly 10% of subjects with a CVD hospitalization are missing grid PM2.5 which I’m guessing is because Kirk was not able to Geocode for what ever reason.

Same Day Association between Smoke PM2.5 and Inpatient Hospitalizations

First things first is to look at the same day association between an increase in smoke PM2.5 and the risk for an inpatient hospitalization.

# extract names
outcome <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
              'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
              'Heart Failure', 'Ischemic Heart Disease', 
              'Myocardial Infarction')
# outcome order
out_order <- c('All Respiratory', 'Asthma', 'COPD', 'Acute Bronchitis',
              'Pneumonia', 'All CVD', 'Arrhythmia', 'Cerebrovascular',
              'Heart Failure', 'Ischemic Heart Disease', 
              'Myocardial Infarction')

# same day results
same_day <- co_hosp_list %>% 
  map_dfr(., function(df){
    out_name <- unique(df$out_name)
    result <- broom::tidy(clogit(outcome ~ gpm_smk10unit + o3_8hr_max_ppb +
                                 temp_f_grid + strata(id),
                                 data = df)) %>% 
      filter(term == 'gpm_smk10unit') %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp)
  }) %>%
  bind_cols(data.frame(outcome), .) %>% 
  mutate(outcome = forcats::fct_relevel(outcome, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")))

Same-day plot.

# plot
plot <- ggplot(data=same_day, aes(x=outcome, y = estimate, colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ", mu, "g/m"^3))) +
  xlab("Outcome") +
  theme_bw() +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(plot)

There is no association for most outcomes. Notable observation would be the inverse association between increasing smoke and a lower likelihood of a respiratory hospitalization.

Here is a table for the point estimated and 95%CIs from the plot above. The gpm_smk10unit stands for grid PM2.5 smoke estimate, 10 unit increase.

knitr::kable(dplyr::select(same_day, outcome, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)), 
  caption = 'Same Day Association with Smoke and Cardiopulmonary Hospitalizations')
Same Day Association with Smoke and Cardiopulmonary Hospitalizations
outcome estimate conf.low conf.high
All Respiratory 0.950 0.901 1.003
Asthma 0.989 0.854 1.145
COPD 0.912 0.791 1.050
Acute Bronchitis 1.141 0.872 1.495
Pneumonia 1.028 0.935 1.130
All CVD 0.988 0.948 1.029
Arrhythmia 1.036 0.935 1.149
Cerebrovascular 1.048 0.949 1.158
Heart Failure 1.055 0.954 1.167
Ischemic Heart Disease 0.972 0.877 1.079
Myocardial Infarction 0.980 0.895 1.074

Cumulative effect for a 10 ug/m^3 of smoke exposure over 5 days

distribute_that_lag <- function(lag_mod, strata, exp_b) {
  # output pm basis estimates
  parms <- broom::tidy(lag_mod) %>% 
    filter(stringr::str_detect(term, strata)) %>% 
    dplyr::select(estimate) %>% 
    as_vector()
  # output estimate names for cov matrix
  names <- stringr::str_subset(names(lag_mod$coefficients), strata)
  
  # estimate associations
  est <- exp_b %*% parms
  # estimate standard error for each interval
  # time variable
  time <- ((rep(1:length(est))-1))
  # covariance matrix for knots 
  cov_mat <- as.matrix(vcov(lag_mod))[names, names]
  # estimate variance of spline
  var <- exp_b %*% cov_mat %*% t(exp_b)
  # estimate lag ----
  # estimate standard error for each lag day for smoke
  l_se <- sqrt(diag(var))
  # calculate lower and upper bound for smoke
  l_est_l95 <- est + (l_se*qnorm(1-0.975))
  l_est_u95 <- est + (l_se*qnorm(0.975))
  l_type <- "lag"
  # lag dataframe
  l_df <- data.frame(strata, l_type, time, 
                     exp(est), exp(l_est_l95), exp(l_est_u95), 
                     row.names = NULL) 
  # assign column names
  colnames(l_df) <- c("strata", "type", "time", 
                      "odds_ratio", "lower_95", "upper_95")
  # cumulative estimates
  c_est <- sapply(seq_along(est), function(x){
    sum(est[1:x])
  })
  # stderr cumulative effect smk
  c_se <- sapply(seq_along(c_est), function(y){
    sqrt(sum(var[1:y,1:y]))
  })
  # estimate 95% CI
  c_l95 <- c_est+(c_se*qnorm(1-0.975))
  c_u95 <- c_est+(c_se*qnorm(0.975))
  # type
  c_type <- "cumulative"
  # return dataframe
  c_df <- data.frame(strata, c_type, time, exp(c_est), 
                     exp(c_l95), exp(c_u95), row.names = NULL) 
  # assign column names
  colnames(c_df) <- c("strata", "type", "time", 
                      "odds_ratio", "lower_95", "upper_95")
  # bind lagged and cumulative 
  lag_est <- rbind(l_df, c_df) %>% 
    mutate(strata = as.character(strata),
           type = as.character(type))
  # return lagged estimate
  return(lag_est)
} # end lag estimate function

Contstrained distributed lag results.

# estimating lagged effects

# 5 days lag 
#rep_times <- 6*2
# 3 days lag

rep_times <- 4*2
out_rep <- rep(outcome, each =rep_times)

dl_results  <- lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) # 3 day lag
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5)) # 5 day lag
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) # 3 day lag
                               #temp_f_grid_lag4, temp_f_grid_lag5)) # 5 day lag
  
  # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3)) # 3 day lag,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5)) # 5 day lag
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                    data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b = exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 
# get the cumulative effect over 6 days
cumulative_results <- filter(dl_results, type == 'cumulative' & time == 3) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(outcome, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")))

Cumulative effect of 0 to 3 days of smoke exposure for a 10 ug/m^3.

# plot
plot <- ggplot(data = cumulative_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(plot)

There may be an association with hospitalizations for asthma and certain cardiovascular events (cerebrovascular, heart failure, IHD) for every 10 ug/m^3 of smoke PM2.5 over the course of 6 days of smoke exposure.

Table of cumulative effects results for a 10 ug/m^3 increase in smoke PM2.5

knitr::kable(
  dplyr::select(cumulative_results, outcome, type:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)), 
  caption = 'Cumulative effect of smoke on Cardiopulmonary Hospitalizations')
Cumulative effect of smoke on Cardiopulmonary Hospitalizations
outcome type odds_ratio lower_95 upper_95
All Respiratory cumulative 1.032 0.961 1.108
Asthma cumulative 1.243 1.035 1.493
COPD cumulative 1.039 0.859 1.258
Acute Bronchitis cumulative 1.336 0.925 1.930
Pneumonia cumulative 1.081 0.949 1.231
All CVD cumulative 1.028 0.974 1.086
Arrhythmia cumulative 1.005 0.871 1.159
Cerebrovascular cumulative 1.138 0.994 1.303
Heart Failure cumulative 1.150 1.003 1.320
Ischemic Heart Disease cumulative 1.116 0.976 1.276
Myocardial Infarction cumulative 1.031 0.914 1.164

Constrained distributed lag effects for a 10 ug/m^3 increase in smoke

Creating a plot showing the individual lagged day effects on cardiopulmonary hospitalizations. This is a constrained distributed lag model, meaning all lagged days are accounted for in the model.

constrained_lag_results <- dl_results %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(type == 'lag')

# constrained distributed lag
dl_hosp_plot <- ggplot(data=constrained_lag_results, 
    aes(x=time, y=odds_ratio, group = group, color = group, fill = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95), color = 'transparent',
              alpha = 0.5) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  scale_fill_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(dl_hosp_plot)

For all respiratory and asthma, there appears to be a inverse association (or trending towards an inverse association) on the same day of exposure, with a steady increase in the risk on day 3. I think the acute bronchitis and pneumonia signals are noisy and the day 3 association is not likely real (suggesting that maybe my model decision is not great). It could be that I need to further reduce the lagged days evaluated to maybe 0 to 2 since the smoke events are pretty brief.

knitr::kable(
  dplyr::select(constrained_lag_results, outcome, time:upper_95) %>% 
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
  caption = 'Distributed Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations')
Distributed Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations
outcome time odds_ratio lower_95 upper_95
All Respiratory 0 0.931 0.880 0.986
All Respiratory 1 0.965 0.929 1.003
All Respiratory 2 1.026 0.987 1.067
All Respiratory 3 1.118 1.061 1.179
Asthma 0 0.892 0.759 1.047
Asthma 1 1.028 0.930 1.136
Asthma 2 1.134 1.022 1.257
Asthma 3 1.196 1.040 1.375
COPD 0 0.901 0.779 1.043
COPD 1 0.973 0.879 1.077
COPD 2 1.049 0.950 1.159
COPD 3 1.130 0.978 1.305
Acute Bronchitis 0 1.197 0.883 1.623
Acute Bronchitis 1 0.827 0.667 1.026
Acute Bronchitis 2 0.894 0.720 1.110
Acute Bronchitis 3 1.510 1.118 2.039
Pneumonia 0 1.011 0.915 1.117
Pneumonia 1 0.978 0.912 1.049
Pneumonia 2 1.003 0.935 1.076
Pneumonia 3 1.090 0.991 1.198
All CVD 0 0.969 0.928 1.012
All CVD 1 1.019 0.989 1.049
All CVD 2 1.033 1.002 1.064
All CVD 3 1.009 0.968 1.051
Arrhythmia 0 1.059 0.950 1.181
Arrhythmia 1 0.993 0.918 1.074
Arrhythmia 2 0.969 0.897 1.047
Arrhythmia 3 0.986 0.884 1.099
Cerebrovascular 0 1.015 0.913 1.129
Cerebrovascular 1 1.040 0.967 1.120
Cerebrovascular 2 1.045 0.972 1.125
Cerebrovascular 3 1.030 0.927 1.145
Heart Failure 0 1.025 0.920 1.141
Heart Failure 1 1.033 0.959 1.113
Heart Failure 2 1.040 0.965 1.120
Heart Failure 3 1.045 0.939 1.164
Ischemic Heart Disease 0 0.911 0.815 1.018
Ischemic Heart Disease 1 1.041 0.969 1.118
Ischemic Heart Disease 2 1.099 1.021 1.182
Ischemic Heart Disease 3 1.071 0.969 1.183
Myocardial Infarction 0 0.968 0.878 1.068
Myocardial Infarction 1 0.977 0.915 1.044
Myocardial Infarction 2 1.012 0.946 1.083
Myocardial Infarction 3 1.077 0.986 1.176

Unconstrained Lag Model for Hospitalizations

Running an unconstrained lag model where I model the cooresponding lagged day of smoke, adjusting for the same lag day of temperature and ozone. This model is for comparison with our 2012 Washington paper and to see how sensitive our distributed lag models are.

NOTE 2020-05-31 CHANGED THIS BACK TO A 3 DAY LAG PERIOD

# repeat outcomes 4 or 6 times based on lag period

# 5 day lag
#out_rep <- rep(outcome, each = 6)
# day to repeat
#day <- rep(0:5, times = 11)


# 3 day lag
out_rep <- rep(outcome, each = 4)
# day to repeat
day <- rep(0:3, times = 11)

# lag day vector
lag_days <- c('', '_lag1', '_lag2', '_lag3') #, '_lag4', '_lag5')

# map across lagged days
hosp_uc_lag_results <- map_dfr(co_hosp_list, function(df){
  data <- df
    results <- map_dfr(lag_days, function(x){
      # define function
      f <- as.formula(paste('outcome~',
        paste(paste0(c('gpm_smk10unit', 'o3_8hr_max_ppb', 'temp_f_grid'), 
                     x), collapse = '+'),
        '+strata(id)'))
      r <- broom::tidy(clogit(f, data = data)) %>% 
      filter(term == paste0('gpm_smk10unit',x)) %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp)
    })
  }) %>% 
  cbind(out_rep, day, .) %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) 

Plot of unconstrained lag results for hospitalizations.

uc_plot <- ggplot(data=hosp_uc_lag_results, aes(x=day, y=estimate, 
                                                group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(uc_plot)

In the context of the distributed lag models, this may help confirm some of the associations. I think the respiratory and asthma trends are supported by these plots. Same with ischemic heart disease.

Unconstrained lag associations table.

knitr::kable(dplyr::select(hosp_uc_lag_results, outcome, day, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
  caption = 'Unconstrained Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations')
Unconstrained Lag Daily Associations with Smoke and Cardiopulmonary Hospitalizations
outcome day estimate conf.low conf.high
All Respiratory 0 0.950 0.901 1.003
All Respiratory 1 0.993 0.943 1.046
All Respiratory 2 1.040 0.988 1.094
All Respiratory 3 1.106 1.053 1.162
Asthma 0 0.989 0.854 1.145
Asthma 1 1.101 0.968 1.251
Asthma 2 1.197 1.049 1.366
Asthma 3 1.259 1.109 1.430
COPD 0 0.912 0.791 1.050
COPD 1 1.069 0.935 1.222
COPD 2 1.005 0.880 1.148
COPD 3 1.148 1.004 1.312
Acute Bronchitis 0 1.141 0.872 1.495
Acute Bronchitis 1 0.979 0.741 1.293
Acute Bronchitis 2 1.099 0.837 1.442
Acute Bronchitis 3 1.374 1.049 1.801
Pneumonia 0 1.028 0.935 1.130
Pneumonia 1 1.018 0.928 1.117
Pneumonia 2 1.046 0.952 1.149
Pneumonia 3 1.084 0.992 1.185
All CVD 0 0.988 0.948 1.029
All CVD 1 1.050 1.010 1.091
All CVD 2 1.015 0.975 1.056
All CVD 3 1.029 0.991 1.069
Arrhythmia 0 1.036 0.935 1.149
Arrhythmia 1 1.020 0.920 1.132
Arrhythmia 2 0.954 0.859 1.059
Arrhythmia 3 0.984 0.889 1.089
Cerebrovascular 0 1.048 0.949 1.158
Cerebrovascular 1 1.122 1.020 1.235
Cerebrovascular 2 1.050 0.953 1.157
Cerebrovascular 3 1.078 0.978 1.189
Heart Failure 0 1.055 0.954 1.167
Heart Failure 1 1.122 1.018 1.237
Heart Failure 2 1.050 0.952 1.159
Heart Failure 3 1.090 0.987 1.203
Ischemic Heart Disease 0 0.972 0.877 1.079
Ischemic Heart Disease 1 1.104 1.007 1.210
Ischemic Heart Disease 2 1.101 1.000 1.213
Ischemic Heart Disease 3 1.124 1.025 1.232
Myocardial Infarction 0 0.980 0.895 1.074
Myocardial Infarction 1 1.002 0.918 1.094
Myocardial Infarction 2 1.018 0.932 1.112
Myocardial Infarction 3 1.069 0.984 1.160

Sex-Specific Stratification

Using the distributed lag model for each sex strata. Also modifying code to pull out n cases analyzed.

# estimating lagged effects

#rep_times <- 2*2*6 # 5  days

rep_times <- 2*2*4 # 3 days

out_rep <- rep(outcome, each = rep_times)

sex_strata <- c('F', 'M')

sex_results  <- map_dfr(co_hosp_list, function(df){
  results <- map_dfr(sex_strata, function(x){
   data <- df %>% 
     filter(sex == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 ))# 3day lag, 
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                                 ## temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 ))# , 
                               #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data)
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(sex = x)
    return(lag_est)
  }) 
}) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 

Sex-specific plots of the cumulative effect.

NOTE FOR SHERYL 2020-05-31: Are these the estimates you wanted for sex specific asthma? The cumulative effect?

# subset to cumulative results
cumulative_sex_strata <- sex_results %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  mutate(sex_long = if_else(sex == 'F', 'Female', 'Male'),
         outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) 

# plot
sex_plot <- ggplot(data=cumulative_sex_strata, aes(x=sex_long, y=odds_ratio, 
                                        group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Sex") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(sex_plot)

Sex-specific cumulative effect estimates table.

knitr::kable(dplyr::select(cumulative_sex_strata, outcome, sex_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Sex-specific cumulative effect of smoke on hosptializations.')
Sex-specific cumulative effect of smoke on hosptializations.
outcome sex_long odds_ratio lower_95 upper_95
All Respiratory Female 0.999 0.904 1.104
All Respiratory Male 1.067 0.964 1.182
Asthma Female 1.124 0.869 1.453
Asthma Male 1.375 1.056 1.791
COPD Female 0.988 0.751 1.299
COPD Male 1.093 0.836 1.427
Acute Bronchitis Female 1.009 0.579 1.757
Acute Bronchitis Male 1.737 1.052 2.868
Pneumonia Female 1.061 0.886 1.271
Pneumonia Male 1.101 0.913 1.328
All CVD Female 1.059 0.979 1.145
All CVD Male 1.003 0.930 1.081
Arrhythmia Female 1.114 0.913 1.358
Arrhythmia Male 0.904 0.735 1.112
Cerebrovascular Female 1.190 0.941 1.505
Cerebrovascular Male 1.114 0.943 1.315
Heart Failure Female 1.195 0.938 1.523
Heart Failure Male 1.130 0.955 1.336
Ischemic Heart Disease Female 1.139 0.943 1.375
Ischemic Heart Disease Male 1.095 0.905 1.325
Myocardial Infarction Female 1.142 0.973 1.340
Myocardial Infarction Male 0.902 0.749 1.085

Age-Specific Stratification

Age category stratified results.

#rep_times <- 3*2*6 # 5 days lag
rep_times <- 3*2*4 # 3 days lag


# estimating lagged effects
out_rep <- rep(outcome, each = rep_times)

age_strata <- c('age_under_15', 'age_15_to_65', 'age_over_65')

age_results  <- map_dfr(co_hosp_list, function(df){
  results <- map_dfr(age_strata, function(x){
   data <- df %>% 
     filter(age_cat == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
   
    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 )) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                                 # temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3)) # ,
                               # o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- tryCatch(clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data))
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(age_cat = x)
    return(lag_est)
  })
}) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 

Age-specific plots of the cumulative effect.

# subset to cumulative results
cumulative_age_strata <- age_results %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  mutate(age_long = forcats::fct_relevel(
      case_when(age_cat == 'age_under_15' ~ 'Age < 15',
                age_cat == 'age_15_to_65' ~ 'Age 15 to 64',
                age_cat == 'age_over_65' ~ 'Age > 64'),
      c('Age < 15', 'Age 15 to 64', 'Age > 64')),
         outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(upper_95 < 3.5)

# plot
age_plot <- ggplot(data=cumulative_age_strata, aes(x=age_long, y=odds_ratio, 
                                        group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Sex") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(age_plot)

Age-specific cumulative effect estimates table.

knitr::kable(dplyr::select(cumulative_age_strata, outcome, age_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Age-specific cumulative effect of smoke on hosptializations.')
Age-specific cumulative effect of smoke on hosptializations.
outcome age_long odds_ratio lower_95 upper_95
All Respiratory Age < 15 1.220 0.923 1.613
All Respiratory Age 15 to 64 1.001 0.899 1.115
All Respiratory Age > 64 1.036 0.935 1.147
Asthma Age < 15 0.491 0.236 1.020
Asthma Age 15 to 64 1.292 1.035 1.613
Asthma Age > 64 1.537 1.058 2.232
COPD Age 15 to 64 1.177 0.879 1.574
COPD Age > 64 0.949 0.737 1.223
Acute Bronchitis Age < 15 1.840 1.185 2.858
Acute Bronchitis Age 15 to 64 0.354 0.122 1.031
Acute Bronchitis Age > 64 1.080 0.367 3.176
Pneumonia Age < 15 1.001 0.502 1.995
Pneumonia Age 15 to 64 0.965 0.776 1.200
Pneumonia Age > 64 1.154 0.977 1.363
All CVD Age < 15 0.817 0.359 1.858
All CVD Age 15 to 64 1.026 0.940 1.119
All CVD Age > 64 1.032 0.963 1.107
Arrhythmia Age 15 to 64 0.848 0.648 1.108
Arrhythmia Age > 64 1.077 0.908 1.276
Cerebrovascular Age 15 to 64 1.149 0.940 1.405
Cerebrovascular Age > 64 1.135 0.944 1.364
Heart Failure Age 15 to 64 1.174 0.956 1.441
Heart Failure Age > 64 1.139 0.945 1.372
Ischemic Heart Disease Age 15 to 64 1.069 0.817 1.399
Ischemic Heart Disease Age > 64 1.132 0.969 1.322
Myocardial Infarction Age 15 to 64 1.117 0.907 1.376
Myocardial Infarction Age > 64 0.990 0.853 1.148

Mortality

Since we have mortality data, I’m also going to evaluate the association between grid-level smoke PM2.5 and cardiopulmonary mortality. I’ve created time-stratified case-crossover data frames of cardiopulmonary mortality events with referent period within the same month. I’ve also limit mortality case-crossover events to Front Range counties from May to October from 2010 to 2015.

# load casecrossover list -----
load("../../data/health/co_mortality_cc_list.RData")
# load mortality outcome list
load("../../data/health/icd10_outcome.RData")
# extract a vector of outcome names from the icd10 outcomes list 
outcomes <- names(icd10_outcomes)

# reduce case-crossover list to only summer months and join pm
co_mortality_list <- casecross_list %>% 
  # desired format to make sure it's right
  map(~ mutate(., outcome = as.numeric(as.character(outcome)),
               date = as.Date(as.character(date_of_death)),
               month = as.factor(lubridate::month(date))) %>% 
      # filter out 2016; I don't have pm data yet
      filter(date <= "2015-12-30") %>% 
      filter(fips %in% fr_fips)) %>% 
  # add outcome name to each dataframe
  map2(.x = ., .y = outcomes, ~mutate(.x, out_name = .y))

Time Series of Respiratory and Cardiovascular Deaths in Colorado

Time series of events for respiratory and cvd only. I also aggregated across front range counties for sufficient numbers. Looking for any potential time trends that may need to be accounted for.

# estimate deaths 
ts_death <- co_mortality_list %>% 
  map_dfr(. , function(df){
    counts <- df %>% 
      filter(outcome == 1) %>% 
      group_by(out_name, date) %>% 
      summarise(n = n())
    }) %>% 
  filter(out_name %in% c('resp', 'cvd'))

Mortality time-series plot.

ts_death_plot <- ggplot(data=ts_death, aes(x=date, y=n)) +
  geom_point() +
  facet_wrap(~out_name) +
  theme_minimal()

ts_death_plot

There is a general increase in cardiovascular deaths over time, which I’m guessing corresponds with the population increase of Colorado. Rates probably would show a more stable rate. Interesting observation in the respiratory related morbidity is the spike at the end of 2014, which I think is because of the bad flu season. I remember Kirk saying this.

I am going to limit mortality to May to October months in 2010 to 2015 and join with PM2.5 estimates.

# extract names
cause_death <- c('Respiratory', 'Asthma', 'COPD', 'CVD', 'Heart Failure',
                 'Cardiac Arrest', 'Ischemic Heart Disease', 
                 'Myocardial Infarction', 'Cerebrovascular')

# reduce case-crossover list to only summer months and join GRID ID
co_death_list <- co_mortality_list %>% 
  # filter out 2016; I don't have pm data yet
  map(~ filter(., date <= "2015-12-30") %>% 
      filter(month %in% 5:10) %>% 
      filter(fips %in% fr_fips) %>% 
      mutate(WRFGRID_ID = as.character(wrfgrid_id),
             # age category
             age_cat = case_when(age < 15 ~ 'age_under_15',
                                 age >= 15 & age < 65 ~ 'age_15_to_65',
                                 age >= 65 ~ 'age_over_65')) %>% 
      dplyr::select(-month, -wrfgrid_id) %>%
      # join with grid key
      left_join(grid_key, by = 'WRFGRID_ID') %>% 
      left_join(co_pm, by = c("fips", "date")) %>% 
      # left join grid pm
      left_join(grid_pm, by = c("GRID_ID", "date")) %>% 
      left_join(ozone, by = c("GRID_ID", "date"))) %>% 
  # add outcome name to each dataframe
  map2(.x = ., .y = cause_death, ~mutate(.x, out_name = .y))

Strata Counts

Number of cardiopulmonary deaths in Front Range counties during this time period.

strata <- c('All', 'Sex', 'Age')

# estimate deaths 
death_counts <- co_death_list %>% 
  map_dfr(.,function(df){
    map_dfr(strata, function(x){
    if(x == 'All'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name) %>% 
        summarise(n_events = n()) %>% 
        mutate(strata = x) %>% 
        dplyr::select(out_name, strata, n_events)
    } else if(x == 'Sex'){
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, sex) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = sex) %>% 
        dplyr::select(out_name, strata, n_events)
    } else {
      counts <- df %>% 
        filter(outcome == 1) %>% 
        group_by(out_name, age_cat) %>% 
        summarise(n_events = n()) %>% 
        rename(strata = age_cat) %>% 
        dplyr::select(out_name, strata, n_events)
    }
    }) # end strata map
  }) %>% # end outcome list map
  spread(strata, n_events) %>% 
  dplyr::select(out_name, All, F, M, age_under_15, age_15_to_65, age_over_65) %>% 
  mutate_at(vars(F:age_over_65), funs(round((./All)*100,1)))

# print death counts
knitr::kable(death_counts, caption='Front Range Cardiopulmonary Deaths and Strata Proportion')
Front Range Cardiopulmonary Deaths and Strata Proportion
out_name All F M age_under_15 age_15_to_65 age_over_65
Asthma 101 68.3 31.7 4.0 44.6 51.5
Cardiac Arrest 157 58.0 42.0 NA 18.5 81.5
Cerebrovascular 3443 60.1 39.9 0.3 14.6 85.1
COPD 4056 51.2 48.8 NA 12.4 87.6
CVD 18122 49.9 50.1 0.2 20.0 79.8
Heart Failure 1395 59.5 40.5 0.1 6.3 93.6
Ischemic Heart Disease 7520 39.9 60.1 0.0 23.2 76.8
Myocardial Infarction 1948 40.9 59.1 NA 25.4 74.6
Respiratory 7025 50.5 49.5 0.4 14.9 84.7

Assessing how many CVD deaths are missing a grid assignment but were assigned a county PM2.5. Not that many missing (~2%).

missing <- co_death_list[[4]] %>% 
  filter(outcome == 1) %>% 
  mutate(county_miss = ifelse(is.na(pm_krig), 1, 0),
         grid_miss = ifelse(is.na(pm25_grid), 1, 0)) %>% 
  group_by(county_miss, grid_miss) %>% 
  summarize(n = n()) %>% 
  mutate(prop = round(n/(17720+402),2))

knitr::kable(missing, caption = 'CVD mortality missing grid PM2.5')
CVD mortality missing grid PM2.5
county_miss grid_miss n prop
0 0 17493 0.97
0 1 629 0.03

Same Day Association between Smoke and Death

Same analysis as hospitalizations where likelihood of a cardiopulmonary death is regressed a 10 ug/m^3 increase in smoke PM2.5. Model accounts for within subject variability and adjusts for temperature.

# same day results
same_day_death <- co_death_list %>% 
  map_dfr(., function(df){
    out_name <- unique(df$out_name)
    result <- broom::tidy(clogit(outcome ~ gpm_smk10unit + o3_8hr_max_ppb + 
                                 temp_f_grid + strata(id),
                                 data = df)) %>% 
      filter(term == 'gpm_smk10unit') %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp) %>% 
      mutate(outcome_name = out_name)
  }) %>% 
  mutate(outcome_name = forcats::fct_relevel(outcome_name, .$outcome_name),
         group = as.factor(ifelse(outcome_name %in% c('Respiratory', 'Asthma', 'COPD'),
                        'Respiratory', 'Cardiovascular')))

Mortality same day association plot.

death_plot <- ggplot(data=same_day_death, aes(x=outcome_name, y = estimate, 
                                              colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Underlying Cause of Death") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(death_plot)

Note there are very few deaths with underlying cause of death as asthma and cardiac arrest and thus a lot of variability. I’m going to plot without these two outcomes so it’s easier to read.

Underlying cause of death plot I’ll use in the paper.

death_plot2 <- ggplot(data = filter(same_day_death, 
                                    !(outcome_name %in% c('Asthma', 'Cardiac Arrest'))),  
                                    aes(x=outcome_name, y = estimate, colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Underlying Cause of Death") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75))

print(death_plot2)

No significant associations with a 10 ug/m^3 increase in smoke PM2.5 and cardiopulmonary morbidity.

Here is a table of the same-day mortality association.

knitr::kable(dplyr::select(same_day_death, outcome_name, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
             caption = "Same Day Association with Smoke and Mortality")
Same Day Association with Smoke and Mortality
outcome_name estimate conf.low conf.high
Respiratory 1.072 0.943 1.219
Asthma 1.746 0.788 3.868
COPD 1.014 0.862 1.192
CVD 1.024 0.944 1.111
Heart Failure 0.935 0.699 1.252
Cardiac Arrest 2.932 1.111 7.740
Ischemic Heart Disease 0.968 0.854 1.099
Myocardial Infarction 1.168 0.900 1.516
Cerebrovascular 1.135 0.940 1.370

Cumulative effect for a > 10 ug/m^3 of smoke exposure over 4 days on risk of death

Looking at cumulative exposure of 0 to 3 lagged days of smoke exposure on risk for cardiopulmonary death.

# rep_times <- 6*2 # 5 days
rep_times <- 4*2 # 3 days

cod <- rep(cause_death, each = rep_times)

dl_death_results  <- lapply(co_death_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 )) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                               # temp_f_grid_lag4, temp_f_grid_lag5))
  
  # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #, 
                             # o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) 

Cumulative effect death.

cumulative_death_results <- filter(dl_death_results, 
                                   type == 'cumulative' & time == 3) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(cod, cause_death),
         group = as.factor(if_else(cod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
    filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))

Plot of cumulative effect.

# plot
cumulative_death_plot <- ggplot(data = cumulative_death_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10 ", mu,"g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(cumulative_death_plot)

Removed asthma and cardiac arrest as they were highly variable.

Table of the cumulative effects of smoke on underlying cause of death.

knitr::kable(dplyr::select(cumulative_death_results,outcome, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = "Cumulative Effect of Smoke on Mortality")
Cumulative Effect of Smoke on Mortality
outcome odds_ratio lower_95 upper_95
Respiratory 1.130 0.944 1.352
COPD 0.950 0.746 1.208
CVD 1.046 0.938 1.167
Heart Failure 0.847 0.577 1.244
Ischemic Heart Disease 1.051 0.886 1.246
Myocardial Infarction 1.190 0.841 1.684
Cerebrovascular 1.043 0.809 1.346

Distributed Lag Estiamtes for Mortality

Plot of daily estimates from the distributed lag model for mortality.

constrained_lag_results_death <- dl_death_results %>% 
  mutate(outcome = forcats::fct_relevel(cod, cause_death),
         group = as.factor(if_else(cod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(!(outcome %in% c('Asthma', 'Cardiac Arrest'))) %>% 
  filter(type == 'lag')

# constrained distributed lag
dl_death_plot <- ggplot(data=constrained_lag_results_death, 
    aes(x=time, y=odds_ratio, group = group, color = group, fill = group)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = lower_95, ymax = upper_95), color = 'transparent',
              alpha = 0.5) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Underlying Cause of Death", values = c("#ff00cc", "darkblue")) +
  scale_fill_manual("Underlying Cause of Death", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(dl_death_plot)

If you look at the effect sizes, I do think there may be some interesting associations with all cause respiratory and CVD. It also appears that maybe heart attacks and strokes could be increased on the day of smoke exposure.

But there is too much noise at this point I think to make any conclusions if there is an association or not with smoke and CVD or respiratory deaths in this data.

Daily distributed lagged estimates table.

knitr::kable(dplyr::select(constrained_lag_results_death, outcome, time:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Lagged Associations between Smoke and Underlying Cause of Death')
Lagged Associations between Smoke and Underlying Cause of Death
outcome time odds_ratio lower_95 upper_95
Respiratory 0 1.014 0.883 1.163
Respiratory 1 1.079 0.982 1.186
Respiratory 2 1.064 0.968 1.169
Respiratory 3 0.970 0.841 1.120
COPD 0 1.024 0.860 1.221
COPD 1 0.992 0.870 1.131
COPD 2 0.972 0.853 1.106
COPD 3 0.962 0.789 1.173
CVD 0 0.995 0.912 1.087
CVD 1 1.033 0.974 1.097
CVD 2 1.030 0.970 1.094
CVD 3 0.987 0.909 1.072
Heart Failure 0 0.856 0.616 1.189
Heart Failure 1 1.154 0.916 1.454
Heart Failure 2 1.114 0.887 1.399
Heart Failure 3 0.770 0.549 1.079
Ischemic Heart Disease 0 0.925 0.807 1.062
Ischemic Heart Disease 1 1.038 0.944 1.141
Ischemic Heart Disease 2 1.072 0.975 1.180
Ischemic Heart Disease 3 1.020 0.895 1.163
Myocardial Infarction 0 1.126 0.847 1.496
Myocardial Infarction 1 1.054 0.871 1.275
Myocardial Infarction 2 1.011 0.835 1.223
Myocardial Infarction 3 0.993 0.748 1.317
Cerebrovascular 0 1.161 0.947 1.422
Cerebrovascular 1 0.984 0.856 1.130
Cerebrovascular 2 0.930 0.807 1.073
Cerebrovascular 3 0.982 0.816 1.183

Unconstrained Lag

Unconstrained lag for mortality.

rep_times <- 4
# repeat outcomes 4 times
ucod <- rep(cause_death, each = rep_times)
# day to repeat
day <- rep(0:3, times = 9)

# lag day vector
lag_days <- c('', '_lag1', '_lag2', '_lag3') # , '_lag4', '_lag5')

# map across lagged days
death_uc_lag_results <- map_dfr(co_death_list, function(df){
  data <- df
    results <- map_dfr(lag_days, function(x){
      # define function
      f <- as.formula(paste('outcome~',
        paste(paste0(c('gpm_smk10unit', 'o3_8hr_max_ppb', 'temp_f_grid'), 
                     x), collapse = '+'),
        '+strata(id)'))
      r <- broom::tidy(clogit(f, data = data)) %>% 
      filter(term == paste0('gpm_smk10unit',x)) %>% 
      dplyr::select(term, estimate, conf.low, conf.high) %>% 
      mutate_at(vars(estimate:conf.high), exp)
    })
  }) %>% 
  cbind(ucod, day, .) %>% 
  mutate(outcome = forcats::fct_relevel(ucod, cause_death),
         group = as.factor(if_else(outcome %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) 

Plot for unconstrained lag.

death_uc_plot <- ggplot(data=death_uc_lag_results, aes(x=day, y=estimate, 
                                                group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(death_uc_plot)

Table for unconstrained lag values.

knitr::kable(dplyr::select(death_uc_lag_results, ucod, day, estimate:conf.high) %>%
    mutate_at(vars(estimate:conf.high), ~round(.x, 3)),
             caption = 'Uncontrained Lag Association between Smoke and Underlying Cause of Death')
Uncontrained Lag Association between Smoke and Underlying Cause of Death
ucod day estimate conf.low conf.high
Respiratory 0 1.072 0.943 1.219
Respiratory 1 1.107 0.980 1.249
Respiratory 2 1.087 0.958 1.233
Respiratory 3 1.022 0.893 1.169
Asthma 0 1.746 0.788 3.868
Asthma 1 2.070 0.640 6.691
Asthma 2 0.455 0.094 2.193
Asthma 3 3.301 1.032 10.559
COPD 0 1.014 0.862 1.192
COPD 1 0.971 0.820 1.149
COPD 2 0.959 0.801 1.148
COPD 3 0.935 0.776 1.127
CVD 0 1.024 0.944 1.111
CVD 1 1.041 0.961 1.127
CVD 2 1.052 0.972 1.138
CVD 3 1.007 0.933 1.087
Heart Failure 0 0.935 0.699 1.252
Heart Failure 1 1.037 0.774 1.389
Heart Failure 2 1.005 0.767 1.318
Heart Failure 3 0.837 0.623 1.126
Cardiac Arrest 0 2.932 1.111 7.740
Cardiac Arrest 1 1.457 0.697 3.044
Cardiac Arrest 2 1.110 0.415 2.970
Cardiac Arrest 3 2.005 0.865 4.650
Ischemic Heart Disease 0 0.968 0.854 1.099
Ischemic Heart Disease 1 1.051 0.928 1.190
Ischemic Heart Disease 2 1.088 0.959 1.234
Ischemic Heart Disease 3 1.051 0.932 1.187
Myocardial Infarction 0 1.168 0.900 1.516
Myocardial Infarction 1 1.114 0.867 1.431
Myocardial Infarction 2 1.095 0.866 1.385
Myocardial Infarction 3 1.037 0.803 1.340
Cerebrovascular 0 1.135 0.940 1.370
Cerebrovascular 1 0.988 0.818 1.195
Cerebrovascular 2 0.980 0.816 1.176
Cerebrovascular 3 0.961 0.807 1.145

Sex-Specific Stratification

Using the distributed lag model for each sex strata. Also modifying code to pull out n cases analyzed.

rep_times <- 2*2*4 # 3 days

# estimating lagged effects
ucod <- rep(cause_death, each = rep_times)

sex_strata <- c('F', 'M')

sex_death_results  <- map_dfr(co_death_list, function(df){
  results <- map_dfr(sex_strata, function(x){
   data <- df %>% 
     filter(sex == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
   
    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) #,
                             # gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                               #  temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3)) #,
                               # o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data)
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(sex = x)
    return(lag_est)
  }) 
}) %>% 
  # bind in outcome names
  bind_cols(data.frame(ucod), .) 

Sex-specific plots of the cumulative effect.

# subset to cumulative results
cumulative_sex_death_strata <- sex_death_results %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  mutate(sex_long = if_else(sex == 'F', 'Female', 'Male'),
         outcome = forcats::fct_relevel(ucod, cause_death),
         group = as.factor(if_else(ucod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
  filter(!(outcome %in% c('Asthma', 'Cardiac Arrest'))) 

# plot
sex_plot <- ggplot(data=cumulative_sex_death_strata, aes(x=sex_long, y=odds_ratio, 
                                        group = group, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Sex") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(sex_plot)

Sex-specific estimates table.

knitr::kable(dplyr::select(cumulative_sex_strata, outcome, sex_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Sex-specific cumulative effect of smoke on hosptializations.')
Sex-specific cumulative effect of smoke on hosptializations.
outcome sex_long odds_ratio lower_95 upper_95
All Respiratory Female 0.999 0.904 1.104
All Respiratory Male 1.067 0.964 1.182
Asthma Female 1.124 0.869 1.453
Asthma Male 1.375 1.056 1.791
COPD Female 0.988 0.751 1.299
COPD Male 1.093 0.836 1.427
Acute Bronchitis Female 1.009 0.579 1.757
Acute Bronchitis Male 1.737 1.052 2.868
Pneumonia Female 1.061 0.886 1.271
Pneumonia Male 1.101 0.913 1.328
All CVD Female 1.059 0.979 1.145
All CVD Male 1.003 0.930 1.081
Arrhythmia Female 1.114 0.913 1.358
Arrhythmia Male 0.904 0.735 1.112
Cerebrovascular Female 1.190 0.941 1.505
Cerebrovascular Male 1.114 0.943 1.315
Heart Failure Female 1.195 0.938 1.523
Heart Failure Male 1.130 0.955 1.336
Ischemic Heart Disease Female 1.139 0.943 1.375
Ischemic Heart Disease Male 1.095 0.905 1.325
Myocardial Infarction Female 1.142 0.973 1.340
Myocardial Infarction Male 0.902 0.749 1.085

Age-Specific Estimates

rep_times <- 2*3*4 # 3 day lag
# repeat cause of death name 
ucod <- rep(cause_death[c(1,4)], each = rep_times)

# define age strata to loop through
age_strata <- c('age_under_15', 'age_15_to_65', 'age_over_65')

age_death_results  <- map_dfr(co_death_list[c(1,4)], function(df){
  results <- map_dfr(age_strata, function(x){
   data <- df %>% 
     filter(age_cat == x) %>% 
     mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
   
    # create lagged matrix
    pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) # ,
                             # gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
    # temp matrix
    temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                                 temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                                 # temp_f_grid_lag4, temp_f_grid_lag5))
    
    # ozone matrix
    o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                               o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3)) #,
                               # o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
    
    # define lagged basis spline
    exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
    # pm basis
    pm_basis <- pm_mat %*% exp_b
    # temp basis
    temp_basis <- temp_mat %*% exp_b
    # ozone basis
    o3_basis <- o3_mat %*% exp_b
    # run lagged model
    lag_mod <- clogit(outcome ~ pm_basis + o3_basis + temp_basis + strata(id), 
                      data = data)
  
    # estimate lag estimate
    lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) %>% 
      mutate(age_cat = x)
    return(lag_est)
  }) 
}) %>% 
  bind_cols(data.frame(ucod), .)

Age-specific plots of the cumulative effect.I’m going to cut out deaths under 15 because there aren’t many of them and the variance is high.

# subset to cumulative results
cumulative_age_death_strata <- age_death_results %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  filter(age_cat != 'age_under_15') %>% 
  mutate(age_long = forcats::fct_relevel(
    case_when(age_cat == 'age_under_15' ~ 'Age < 15',
              age_cat == 'age_15_to_65' ~ 'Age 15 to 64',
              age_cat == 'age_over_65' ~ 'Age > 64'),
    c('Age < 15', 'Age 15 to 64', 'Age > 64')),
    outcome = forcats::fct_relevel(if_else(ucod == 'Respiratory', 'Respiratory',
                                           'Cardiovascular'), 
                                   c('Cardiovascular', 'Respiratory')))

# plot
age_plot <- ggplot(data=cumulative_age_death_strata, aes(x=age_long, y=odds_ratio, 
                                        group = outcome, color = outcome)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  facet_wrap(~outcome, scales = 'free_y', ncol = 4) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Age") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        panel.grid.minor = element_blank(),
        strip.background = element_blank())

print(age_plot)

Table of age-specific associations.

knitr::kable(dplyr::select(cumulative_age_death_strata, 
                    outcome, age_long, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Age-specific associations between smoke and underlying cause of death')
Age-specific associations between smoke and underlying cause of death
outcome age_long odds_ratio lower_95 upper_95
Respiratory Age 15 to 64 0.941 0.565 1.568
Respiratory Age > 64 1.163 0.960 1.410
Cardiovascular Age 15 to 64 1.185 0.930 1.509
Cardiovascular Age > 64 1.014 0.896 1.146

Binary Smoke Classifier

The continuous definition of smoke may have some misclassification bias, particularly where there is difficulty distinguishing from PM2.5 impacted by smoke and a high anthropocentric PM2.5 day that also happens to have some smoke in the atmospheric column. I am going to try out a binary outcome of > 10 ug/m^3 PM2.5 with HMS overhead as a binary indicator for smoke and look at the association between multiple days of a binary smoke exposure and the risk of cardiopulmonary morbidity.

Hospitalizations and Binary Smoke

rep_times <- 2*4 # 3 days

out_rep <- rep(outcome, each = rep_times)

hosp_binary_results  <- lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gsmk10_hms_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gsmk10_hms, gsmk10_hms_lag1, 
                             gsmk10_hms_lag2, gsmk10_hms_lag3)) #, 
                             # gsmk10_hms_lag4, gsmk10_hms_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) #,
                               # temp_f_grid_lag4, temp_f_grid_lag5))
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b

  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) 
# get the cumulative effect over 6 days
cumulative_bin_results <- filter(hosp_binary_results, 
                                 type == 'cumulative' & time == 3) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(outcome, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")))

# plot
cbin_hosp_plot <- ggplot(data = cumulative_bin_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ug/m"^3))) +
  xlab("Outcome") +
  ggtitle("Hospitalization Cumulative Smoke Exposure 0 to 3 Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))

print(cbin_hosp_plot)

There is a significant association with this binary cutoff and asthma over 0 to 3 days. Some of the cardiovascular outcomes that were trending towards ‘significance’ were attenuated to the null here.

Mortality

rep_times <- 2*4
  
cod <- rep(cause_death, each = rep_times)

binary_death_results  <- lapply(co_death_list, function(x){
  # output dataframe from list
  data <- x %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gsmk10_hms, gsmk10_hms_lag1, 
                             gsmk10_hms_lag2, gsmk10_hms_lag3)) #,
                             #gsmk10_hms_lag4, gsmk10_hms_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3))#,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  temp_basis <-
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) 
# get the cumulative effect over 6 days
c_bin_death_results <- filter(binary_death_results, 
                                   type == 'cumulative' & time == 3) %>% 
    dplyr::select(-strata, -time) %>% 
    mutate(outcome = forcats::fct_relevel(cod, cause_death),
         group = as.factor(if_else(cod %in% cause_death[1:3],
         "Respiratory", "Cardiovascular"))) %>% 
    filter(!(outcome %in% c('Asthma', 'Cardiac Arrest')))

# plot
cbin_death_plot <- ggplot(data = c_bin_death_results, 
               aes(x=outcome, y = odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: Smoke PM"[2.5], " > 10 ug/m"^3))) +
  xlab("Outcome") +
  ggtitle("Mortality Cumulative Smoke Exposure 0 to 3 Lagged Days") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.8, vjust = 0.75,
                                   size = 10))

print(cbin_death_plot)

No association with mortality.

Fire/Smoke Event-Specific

Looking at specific fires. I think isolating to specific counties impacted by specific fires gives me more confidence in the kriged models to understand the exposure series. We’ve also used this approach in Washington and Oregon studies. I’m going to run separate models in each section, but plot estimates side by side in a couple plots.

Since there are fewer deaths for some of the subcategories and I’m reducing further to specific times and counties, I’m going to only look at respiratory and cardiovascular general categories of underlying cause of death.

Four-Mile Canyon Fire 2010

The first fire I’ll look at is the Four-Mile Canyon Fire that occurred west of Boulder. It started on September 7, 2010 and contained on September 13th. I will limit the time frame to 2010-05-01 to 2010-10-31 and to Boulder County only. I’m open to adding additional counties known to be impacted by wildfire smoke.

rep_times <- 2*4
out_rep <- rep(outcome, each = rep_times)

mile4_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    # filter to boulder
    filter(fips.x == '08013') %>% 
    filter(date >= '2010-05-01' & date <= '2010-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) # ,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3)) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)
  
  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Four Mile 2010')
rep_times <- 2*4
cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

mile4_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  x <- co_death_list[[1]]
  data <- x %>%     
    # filter to boulder
    filter(fips.x == '08013') %>% 
    filter(date >= '2010-05-01' & date <= '2010-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 )) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Four Mile 2010')

Waldo Canyon 2012

Waldo Canyon started on June 23rd 2012 and burned until July 10th 2012 near Colorado Springs. Limiting to events from El Paso County to assess impact of smoke. Are there other counties I should consider?

Limiting hospitalizations and deaths to May 2012 through August 2012. I’m consider expanding time frame and other counties too.

rep_times <- 2*4
out_rep <- rep(outcome, each = rep_times)

waldo_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    # filter to el paso
    filter(fips.x == '08041') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3)) # ,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Waldo Canyon 2012')
rep_times <- 2*4
cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

waldo_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    # filter to el paso
    filter(fips.x == '08041') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 )) #,
                             # gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3))#,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Waldo Canyon 2012')

High Park Fire

For this fire, I’m limiting hospitalizations and deaths to May 2012 through August 2012 occurring in Larmier county.

rep_times <- 2*4
out_rep <- rep(outcome, each = rep_times)

hipark_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    # filter to larimer
    filter(fips.x == '08069') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))
  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3))#,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'High Park 2012')
rep_times <- 2*4
cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

hipark_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    # filter to el paso
    filter(fips.x == '08069') %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 ))# ,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) # ,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'High Park 2012')

2012 Local Smoke

2012 local fires for all grid cells in study domain.

rep_times <- 2*4 # 3 day
out_rep <- rep(outcome, each = rep_times)

local_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) # ,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 ))#,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Local 2012')
rep_times <- 2*4
cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

local_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    filter(date >= '2012-05-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3))#,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Local 2012')

2015 Transported Smoke

Fires in Canada and Washington led to the transported smoke event that affected the Front Range communities in Colorado in 2015. I have limited this analysis to 2015-05-01 to 2015-10-31 (note hospitalizations only go until 2015-10-01). I included all Front Range counties.

rep_times <- 2*4
out_rep <- rep(outcome, each = rep_times)

trans_hosp_cont <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    filter(date >= '2015-05-01' & date <= '2015-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Transport 2015')
rep_times <- 2*4

cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

trans_death_cont <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    filter(date >= '2015-05-01' & date <= '2015-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 )) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Transport 2015')

Different Event and Hospitalization Plots

Plots of association for each hospitalization outcome by event. I left out Four Mile Canyon fire since there were so few events the estimates were unstable. This can probably be fixed by adding in other counties that were impacted by smoke.

# bind event dataframes together
hosp_event <- bind_rows(waldo_hosp_cont, hipark_hosp_cont, local_hosp_cont,
                        trans_hosp_cont) %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  dplyr::select(-strata, -time) %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")),
         # ordering fire event
         event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
                                               'Local 2012', 'Transport 2012'))) %>% 
  # filter out acute bronchitis
  filter(outcome != 'Acute Bronchitis')

# plot
hosp_event_plot <- ggplot(data = hosp_event, 
                          aes(x=outcome, y=odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  facet_wrap(~event, scales = 'free_y') +  
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),        
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))
  
print(hosp_event_plot)

I think this plot is kind of interesting for asthma, where there is a positive association with the 2015 transport smoke and an inverse association with the 2012 local fires for the study domain. I don’t see an association with the individual fires, which may suggest the inverse association may be due to exposure misclassification? Or it could be real where poeple are taking risk-avoiding behaviors.I think Sheryl and I hypothesized that there might be an effect here because the first couple days of the smoke event were really hard to tell there was actual smoke and not fog. Also note that I included all front range counties because there were air quality warning up and down the front range. I think we could also do a 2012 to 2015 comparison. This might actually be the cleanest way to do this analysis, with the argument that the Front Range was impacted by smoke from two large fires in 2012 and impacted by smoke from transported smoke.

I left out Four Mile Canyon fire for now since the estimates were unstable due to few events.

Printing out fire events hospitalization table.

knitr::kable(dplyr::select(hosp_event, event, outcome, odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Cumulative association between smoke from specific events and hospitalizations')
Cumulative association between smoke from specific events and hospitalizations
event outcome odds_ratio lower_95 upper_95
Waldo Canyon 2012 All Respiratory 1.158 0.805 1.666
Waldo Canyon 2012 Asthma 0.821 0.294 2.291
Waldo Canyon 2012 COPD 0.981 0.357 2.694
Waldo Canyon 2012 Pneumonia 1.662 0.890 3.106
Waldo Canyon 2012 All CVD 1.077 0.809 1.433
Waldo Canyon 2012 Arrhythmia 0.735 0.330 1.637
Waldo Canyon 2012 Cerebrovascular 1.000 0.490 2.039
Waldo Canyon 2012 Heart Failure 0.986 0.482 2.017
Waldo Canyon 2012 Ischemic Heart Disease 1.544 0.732 3.257
Waldo Canyon 2012 Myocardial Infarction 1.134 0.599 2.149
High Park 2012 All Respiratory 0.987 0.697 1.396
High Park 2012 Asthma 0.940 0.231 3.826
High Park 2012 COPD 0.577 0.186 1.791
High Park 2012 Pneumonia 1.342 0.782 2.301
High Park 2012 All CVD 0.971 0.742 1.270
High Park 2012 Arrhythmia 1.288 0.649 2.556
High Park 2012 Cerebrovascular 1.089 0.577 2.056
High Park 2012 Heart Failure 1.212 0.638 2.304
High Park 2012 Ischemic Heart Disease 0.700 0.322 1.519
High Park 2012 Myocardial Infarction 1.172 0.690 1.991
Local 2012 All Respiratory 0.928 0.825 1.044
Local 2012 Asthma 0.716 0.517 0.993
Local 2012 COPD 0.847 0.621 1.155
Local 2012 Pneumonia 1.149 0.935 1.411
Local 2012 All CVD 0.990 0.904 1.084
Local 2012 Arrhythmia 0.991 0.787 1.248
Local 2012 Cerebrovascular 1.091 0.865 1.376
Local 2012 Heart Failure 1.146 0.905 1.451
Local 2012 Ischemic Heart Disease 1.071 0.846 1.357
Local 2012 Myocardial Infarction 1.051 0.860 1.283
Transport 2015 All Respiratory 1.087 0.968 1.221
Transport 2015 Asthma 1.455 1.093 1.939
Transport 2015 COPD 0.996 0.703 1.411
Transport 2015 Pneumonia 0.925 0.734 1.166
Transport 2015 All CVD 1.065 0.977 1.159
Transport 2015 Arrhythmia 1.017 0.804 1.286
Transport 2015 Cerebrovascular 1.118 0.908 1.376
Transport 2015 Heart Failure 1.126 0.912 1.391
Transport 2015 Ischemic Heart Disease 1.184 0.968 1.447
Transport 2015 Myocardial Infarction 1.081 0.897 1.302

Plotting underlying cause of death by fire events.

# bind event dataframes together
death_event <- bind_rows(waldo_death_cont, hipark_death_cont, local_death_cont,
                        trans_death_cont) %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  dplyr::select(-strata, -time) %>% 
  mutate(outcome = as.factor(if_else(cod == 'resp', 
                                     'Respiratory', 'Cardiovascular')),
         # ordering fire event
         event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
                                               'Local 2012', 'Transport 2012'))) 

# plot
death_event_plot <- ggplot(data = death_event, 
                          aes(x=outcome, y=odds_ratio, color = outcome)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  facet_wrap(~event) +  
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),        
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))
  
print(death_event_plot)

No association with deaths.

Table for mortality for specific events.

knitr::kable(dplyr::select(death_event, outcome, event,odds_ratio:upper_95) %>%
    mutate_at(vars(odds_ratio:upper_95), ~round(.x, 3)),
             caption = 'Cumulative association with smoke and mortality for specific events')
Cumulative association with smoke and mortality for specific events
outcome event odds_ratio lower_95 upper_95
Respiratory Waldo Canyon 2012 0.623 0.203 1.914
Cardiovascular Waldo Canyon 2012 1.141 0.649 2.007
Respiratory High Park 2012 0.275 0.075 1.006
Cardiovascular High Park 2012 0.774 0.435 1.379
Respiratory Local 2012 1.148 0.851 1.551
Cardiovascular Local 2012 1.091 0.907 1.311
Respiratory Transport 2015 0.944 0.700 1.274
Cardiovascular Transport 2015 0.906 0.763 1.077

2020-05-31 SENSITIVITY ANALYSIS OF RESTRICING TIME 2012; 2015 fires:

Restricting to August 1st to Oct 31s

rep_times <- 2*4
out_rep <- rep(outcome, each = rep_times)

trans_hosp_cont_v2 <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    filter(date >= '2015-08-01' & date <= '2015-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Transport 2015: August 1st to October 31st')
rep_times <- 2*4

cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

trans_death_cont_v2 <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    filter(date >= '2015-08-01' & date <= '2015-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3 )) #,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Transport 2015: August 1st to October 31st')

Local fires v2

rep_times <- 2*4 # 3 day
out_rep <- rep(outcome, each = rep_times)

local_hosp_cont_v2 <-lapply(co_hosp_list, function(x){
  # output dataframe from list
  data <- x %>% 
    filter(date >= '2012-08-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3)) # ,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3)) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 ))#,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(out_rep), .) %>% 
  mutate(event = 'Local 2012: August 1st to October 31st')
rep_times <- 2*4
cod <- rep(names(co_death_list[c(1,4)]), each=rep_times)

local_death_cont_v2 <- lapply(co_death_list[c(1,4)], function(x){
  # output dataframe from list
  data <- x %>%     
    filter(date >= '2012-08-01' & date <= '2012-10-31') %>% 
    mutate(date = as.Date(date),
           outcome = as.numeric(as.character(outcome))) %>%
    # remove missing lagged data
    filter(!is.na(gpm_smk10unit_lag5))

  # create lagged matrix
  pm_mat <- as.matrix(dplyr::select(data, gpm_smk10unit, gpm_smk10unit_lag1, 
                             gpm_smk10unit_lag2, gpm_smk10unit_lag3))#,
                             #gpm_smk10unit_lag4, gpm_smk10unit_lag5))
  
  # temp matrix
  temp_mat <- as.matrix(dplyr::select(data, temp_f_grid, temp_f_grid_lag1,
                               temp_f_grid_lag2, temp_f_grid_lag3 )) #,
                               #temp_f_grid_lag4, temp_f_grid_lag5))
    # ozone matrix
  o3_mat <- as.matrix(dplyr::select(data, o3_8hr_max_ppb, o3_8hr_max_ppb_lag1,
                             o3_8hr_max_ppb_lag2, o3_8hr_max_ppb_lag3 )) #,
                             #o3_8hr_max_ppb_lag4, o3_8hr_max_ppb_lag5))
  
  # define lagged basis spline
  exp_b <- splines::ns(0:(ncol(pm_mat)-1), df = 3, intercept = T)
  # pm basis
  pm_basis <- pm_mat %*% exp_b
  # temp basis
  temp_basis <- temp_mat %*% exp_b
  # ozone basis
  o3_basis <- o3_mat %*% exp_b
  
  # run lagged model
  lag_mod <- clogit(outcome ~ pm_basis + temp_basis + o3_basis + strata(id), data = data)

  # estimate lag estimate
  lag_est <- distribute_that_lag(lag_mod, strata = "pm", exp_b=exp_b) 
  return(lag_est)
  }) %>%  #end lappply
  # bind rows
  map_dfr(.,rbind) %>% 
  # bind in outcome names
  bind_cols(data.frame(cod), .) %>% 
  mutate(event = 'Local 2012: August 1st to October 31st')

Hospitalization plots.

# bind event dataframes together
hosp_event_v2 <- bind_rows(waldo_hosp_cont, hipark_hosp_cont, local_hosp_cont_v2,
                        trans_hosp_cont_v2) %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  dplyr::select(-strata, -time) %>% 
  mutate(outcome = forcats::fct_relevel(out_rep, out_order),
         group = as.factor(if_else(outcome %in% out_order[1:5],
         "Respiratory", "Cardiovascular")),
         # ordering fire event
         event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
                                               'Local 2012: August 1st to October 31st',
                                               'Transport 2012: August 1st to October 31st'))) %>% 
  # filter out acute bronchitis
  filter(outcome != 'Acute Bronchitis')

# plot
hosp_event_plot_v2 <- ggplot(data = hosp_event_v2, 
                          aes(x=outcome, y=odds_ratio, color = group)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  facet_wrap(~event, scales = 'free_y') +  
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),        
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))
  
print(hosp_event_plot_v2)

Deaths transport vs local limited to august 1st to october 31st

Plotting underlying cause of death by fire events.

# bind event dataframes together
death_event_v2 <- bind_rows(waldo_death_cont, hipark_death_cont, local_death_cont_v2,
                        trans_death_cont_v2) %>% 
  filter(type == 'cumulative' & time == 3) %>% 
  dplyr::select(-strata, -time) %>% 
  mutate(outcome = as.factor(if_else(cod == 'resp', 
                                     'Respiratory', 'Cardiovascular')),
         # ordering fire event
         event = forcats::fct_relevel(event, c('Waldo Canyon 2012', 'High Park 2012',
                                               'Local 2012: August 1st to October 31st', 
                                               'Transport 2012: August 1st to October 31st'))) 

# plot
death_event_plot_v2 <- ggplot(data = death_event_v2, 
                          aes(x=outcome, y=odds_ratio, color = outcome)) +
  geom_point() +
  geom_errorbar(aes(ymin=lower_95, ymax=upper_95), width = 0.3) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  facet_wrap(~event) +  
  scale_color_manual("Cardiopulmonary", values = c("#ff00cc", "darkblue")) +
  geom_hline(yintercept = 1, linetype = "dashed", colour = "red") +
  ylab(expression(paste("Odds Ratio: 10", mu, "g/m"^3, " Increase in Smoke PM"[2.5]))) +
  xlab("Outcome") +
  theme_bw()+
  theme(panel.grid.major.x = element_blank(),
        panel.grid.major.y = element_line(linetype = "dotted"),
        panel.grid.minor = element_blank(),        
        legend.direction = 'horizontal',
        legend.position = 'bottom',
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=0.95, vjust = 0.75,
                                   size = 10))
  
print(death_event_plot_v2)

Conclusions

I think there could be something interesting looking at 2012 vs. 2015 smoke, where transported smoke could be a risk factor. The hypothesis here could be that people are aware of smoke from local fires since they are publicized, where the transported smoke was not clear to people until there were air quality warnings. Mortality doesn’t seem to be associated with these short-term smoke events.